
rm(list = ls())

## load libraries
library(tidyverse)
library(dbplyr)
library(dplyr)
library(RPostgres)
library(DBI)
library(config)
library(caret)
library(haven)
library(zoo)
library(bizdays)

cal <- create.calendar('mycal',
                       weekdays=c("saturday", "sunday"))

begin_date <- "1980-01-01"
end_date   <- "2022-12-31"


if(exists("wrds")){
  dbDisconnect(wrds)
}


## WRDS connection
user <- rstudioapi::askForPassword(prompt = "Enter WRDS Username")
pw <- rstudioapi::askForPassword(prompt = "Enter WRDS Password")
wrds <- dbConnect(Postgres(),
                  host='wrds-pgdata.wharton.upenn.edu',
                  port=9737,
                  sslmode='require',
                  dbname='wrds',
                  user=user,
                  password=pw)


comp.funda <- tbl(wrds,in_schema("comp","funda"))
comp.company <- tbl(wrds,in_schema("comp","company"))



################################################################################
# CRSP stocknames for IBES merge ###############################################
################################################################################

ibes_crsp_link <-
  tbl(wrds, in_schema("crsp", "stocknames")) %>%
  collect()

write_dta(ibes_crsp_link,"return_test/data/ibes_crsp_link.dta")

ibes_crsp <- ibes_crsp_link %>% 
  mutate(cusip = if_else(ncusip != "", ncusip, cusip)) %>% 
  arrange(permno, cusip) %>% 
  group_by(permno, cusip) %>% 
  mutate(min_dt = min(namedt),
         max_dt = max(nameenddt)) %>% 
  ungroup() %>% 
  mutate(namedt    = min_dt,
         nameenddt = max_dt) %>% 
  distinct(permno, cusip, .keep_all = TRUE) %>% 
  rename(ticker_crsp = ticker) %>% 
  arrange(cusip) %>% 
  mutate(cusip_6 = substr(cusip, 1, 6)) %>% 
  select(cusip_6, permno, ticker_crsp, namedt, nameenddt)

write_dta(ibes_crsp,"return_test/data/ibes_crsp_link_to_merge.dta")



################################################################################
# CCM Quarterly Data ###########################################################
################################################################################

## get ccm link table
linktable <- dbSendQuery(wrds,"select GVKEY, LPERMNO, LINKDT, LINKENDDT,
                         LINKTYPE, LINKPRIM
                         from crsp.ccmxpf_lnkhist")

data_ccmlink <- dbFetch(linktable, n = -1) 


chosen_gvkey <- unique(data_ccmlink$gvkey)


## get the compustat fundamentals quarterly file
data_fundq <-
  tbl(wrds, in_schema("comp", "fundq")) %>%
  filter(indfmt  == 'INDL' &
           datafmt == 'STD'&
           popsrc  == 'D' &
           consol  == 'C' &
           between(datadate, begin_date, end_date) &
           gvkey %in% chosen_gvkey) %>%
  select(gvkey, datadate, conm, fyearq, fqtr, fyr, cstat_cusip=cusip, cik, 
         cstat_ticker= tic, conm, exchg, rdq
  ) %>%
  inner_join(select(comp.company, gvkey, sic), by = "gvkey") %>%
  distinct() %>%
  collect()


## merge link table with compustat fundamentals quarterly file
data_ccmq <-  data_ccmlink %>%
  # primary links only
  filter(linktype %in% c("LU", "LC")) %>% #, "LS")) %>%
  filter(linkprim %in% c("P", "C")) %>% #, "J")) %>%
  base::merge(data_fundq, by="gvkey") %>%
  mutate(datadate  = as.Date(datadate), 
         permno    = as.factor(lpermno),
         linkdt    = as.Date(linkdt),
         linkenddt = as.Date(linkenddt),
         linktype  = factor(linktype, levels = c("LC", "LU")),#, "LS")),
         linkprim  = factor(linkprim, levels = c("P", "C"))) %>% #, "J"))) %>%
  filter(datadate  >= linkdt & (datadate <= linkenddt | is.na(linkenddt))) %>%
  arrange(datadate, permno, linktype, linkprim) %>%
  distinct(datadate, permno, .keep_all = TRUE)

write_dta(data_ccmq, "return_test/data/ccm_data_quarterly.dta", version = 14)


# Get ibes earnings announcement dates
ibes_ea_dates <-
  tbl(wrds, in_schema("ibes", "actu_epsus")) %>%
  filter(between(pends, begin_date, end_date),
         !is.na(value),
         !is.na(anndats)) %>%
  mutate(cusip_6 = substr(cusip, 1, 6)) %>%
  select(ticker, cusip_6, pends, anndats, anntims) %>%
  distinct() %>%
  collect()

ibes_ea_dates <- ibes_ea_dates %>% 
  mutate(pends_year  = lubridate::year(pends),
         pends_month = lubridate::month(pends)) %>% 
  distinct(cusip_6, pends_year, pends_month, .keep_all = TRUE)

write_dta(ibes_ea_dates, "return_test/data/ibes_ea_dates.dta", version = 14)



################################################################################
# IBES unadjusted detail file ##################################################
################################################################################

ibes_detu_epsus <-
  tbl(wrds, in_schema("ibes", "detu_epsus")) %>%
  filter(between(fpedats, begin_date, end_date)) %>% 
  collect()

write_dta(ibes_detu_epsus,"return_test/data/ibes_detu_epsus.dta")



################################################################################
# IBES unadjusted actuals file #################################################
################################################################################

ibes_actu_epsus <-
  tbl(wrds, in_schema("ibes", "actu_epsus")) %>%
  filter(between(pends, begin_date, end_date),
         !is.na(value),
         !is.na(anndats)) %>% 
  collect()

write_dta(ibes_actu_epsus,"return_test/data/ibes_actu_epsus.dta")



################################################################################
# CRSP Monthly Return Data #####################################################
################################################################################

monthly_delisting_returns <-
  tbl(wrds, in_schema("crsp", "msedelist")) %>%
  select(permno, dlstdt, dlret, dlstcd)

monthly_returns <-
  tbl(wrds, in_schema("crsp", "msf")) %>%
  filter(
    permno %in% chosen_permno,
    between(date, begin_date, end_date)
  ) %>%
  mutate(begdt = sql("date - interval '1 month'")) %>%
  group_by(permno) %>%
  mutate(
    Price  = abs(prc),
    MVE    = (Price * shrout) / 1000, # divide by 1000 to adjust to Compu
  ) %>%
  ungroup() %>%
  select(permno, date, ret, Price, MVE, begdt)

monthly_with_delistings <-
  monthly_returns %>%
  left_join(monthly_delisting_returns,
            sql_on = paste0(
              '("LHS".permno = "RHS".permno) AND ',
              '("LHS".date   >= "RHS".dlstdt) AND ',
              '("LHS".begdt  <= "RHS".dlstdt) '
            )
  ) %>%
  rename(permno = permno.x) %>%
  select(-permno.y) %>%
  collect()

monthly_compinfo <-
  tbl(wrds, in_schema("crsp", "msenames")) %>%
  filter(
    shrcd %in% c(10, 11),
    exchcd %in% c(1, 2, 3),
    primexch %in% c("N", "A", "Q")
  ) %>%
  select(permno, namedt, nameendt, exchcd, shrcd) %>%
  collect()

mcrsp <-
  monthly_with_delistings %>%
  inner_join(monthly_compinfo, by="permno") %>%
  arrange(permno, date, dlstcd) %>%
  group_by(permno, date, dlstcd) %>%
  mutate(max_nameendt = max(nameendt)) %>%
  ungroup() %>%
  filter((date >= namedt & date <= nameendt) |
           (date >= nameendt & nameendt == max_nameendt & !is.na(dlstcd))) %>%
  mutate(
    delret_impute = case_when(
      is.na(dlret) == T & (dlstcd==500 | (dlstcd>=520 & dlstcd<=584)) & (exchcd==1 | exchcd==2) ~ 1,
      is.na(dlret) == T & (dlstcd==500 | (dlstcd>=520 & dlstcd<=584)) & exchcd==3               ~ 1,
      TRUE ~ 0),
    delret = case_when(
      is.na(dlret) == T & (dlstcd==500 | (dlstcd>=520 & dlstcd<=584)) & (exchcd==1 | exchcd==2) ~ -0.3,
      is.na(dlret) == T & (dlstcd==500 | (dlstcd>=520 & dlstcd<=584)) & exchcd==3               ~ -0.55,
      TRUE ~ dlret)
  ) %>%
  select(-namedt, -nameendt, -max_nameendt, -begdt) %>%
  collect()

rm(monthly_with_delistings, monthly_compinfo)

monthly_indices <-
  tbl(wrds, in_schema("crsp", "msi")) %>%
  select(date, MktRet=vwretd, MktEWRet=ewretd) %>%
  filter(between(date, begin_date, end_date)) %>%
  collect()

dbDisconnect(wrds)


data_merge_uncertainty <- mcrsp %>% 
  arrange(permno, date) %>% 
  group_by(permno) %>% 
  mutate(min_date = min(date)) %>% 
  ungroup() %>% 
  mutate(min_year = lubridate::year(min_date),
         year     = lubridate::year(date),
         age      = year - min_year) %>% 
  filter(lubridate::month(date) == 1,
         !is.na(MVE)) %>% 
  rename(anndats_year = year,
         PERMNO       = permno) %>% 
  select(PERMNO, anndats_year, age, MVE)

write_dta(data_merge_uncertainty,"return_test/data/uncertainty_data_CRSP.dta", version= 14)



